library(data.table)
## Warning: package 'data.table' was built under R version 3.2.5
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
library(plotly)
## Warning: package 'plotly' was built under R version 3.2.5
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:graphics':
##
## layout
rawData = read.csv("C:/Users/Matthew/Documents/SunshineList/Inputs/2015-combined-salary-seconded-en.csv", stringsAsFactors = F)
rawData[,4] = as.numeric(gsub("[$,]","", rawData[,4]))
rawData_universities = rawData[rawData$Sector == "Universities", ]
for (i in (1:length(unique(rawData_universities$Employer)))){
print(unique(rawData_universities$Employer)[i])
filtered_df = rawData_universities[rawData_universities$Employer == unique(rawData_universities$Employer)[i], ]
hist(filtered_df$Salary.Paid)
}
## [1] "Algoma University"
## [1] "Brescia University College"
## [1] "Brock University"
## [1] "Carleton University"
## [1] "Huntington University"
## [1] "Huron University College"
## [1] "King's University College"
## [1] "Lakehead University"
## [1] "Laurentian University of Sudbury"
## [1] "McMaster Divinity College"
## [1] "McMaster University"
## [1] "Nipissing University"
## [1] "Northern Ontario School of Medicine"
## [1] "Ontario College of Art & Design University"
## [1] "Queen's University"
## [1] "Ryerson University"
## [1] "Saint Paul University / Université Saint-Paul"
## [1] "St. Jerome's University"
## [1] "St. Peter's Seminary"
## [1] "Thorneloe University"
## [1] "Trent University"
## [1] "Trinity College"
## [1] "Université de Hearst"
## [1] "University of Ottawa"
## [1] "University of Guelph"
## [1] "University of Ontario Institute of Technology"
## [1] "University of St. Michael's College"
## [1] "University of Sudbury"
## [1] "University of Toronto"
## [1] "University of Waterloo"
## [1] "University of Western Ontario"
## [1] "University of Windsor"
## [1] "Victoria University"
## [1] "Wilfrid Laurier University"
## [1] "York University"
# boxplot(x = rawData_universities$Salary.Paid, data = filtered_df, formula = rawData_universities$Salary.Paid ~ rawData_universities$Employer)
top_universities = c("University of Toronto", "University of Ottawa", "University of Waterloo", "University of Western Ontario")
set1 = unique(rawData_universities$Employer)[1:12]
set2 = unique(rawData_universities$Employer)[13:25]
set3 = unique(rawData_universities$Employer)[25:55]
# Find the distribution of salaries across schools
rawData_universities_split_1 = subset(rawData_universities, Employer %in% set1)
rawData_universities_split_2 = subset(rawData_universities, Employer %in% set2)
rawData_universities_split_3 = subset(rawData_universities, Employer %in% set3)
boxplot(Salary.Paid ~ Employer, data = rawData_universities, las=2)
boxplot(Salary.Paid ~ Employer, data = rawData_universities_split_1, las=2)
boxplot(Salary.Paid ~ Employer, data = rawData_universities_split_2, las=2)
boxplot(Salary.Paid ~ Employer, data = rawData_universities_split_3, las=2)
ggplot(rawData_universities, aes(x=factor(Employer), y=Salary.Paid)) + geom_boxplot() + theme(axis.text.x = element_text(angle=90, vjust=0.5))
ggplot(rawData_universities, aes(x=factor(Employer), y=Salary.Paid)) + geom_boxplot() + theme(axis.text.x = element_text(angle=90, vjust=0.5)) + ylim(0,500000)
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
rawData_universities$City = NA
# Find the distribution of salaries vs roles (for Brock, e.g.)
rawData_universities_Brock = rawData_universities[rawData_universities$Employer == "Brock University", ]
table(rawData_universities_Brock$Job.title)
##
## Acting Associate Dean
## 1
## Application Architect
## 1
## Assistant Professor
## 48
## Assistant Professor / Director, Applied Linguistics
## 1
## Associate Dean
## 10
## Associate Director, Total Rewards
## 1
## Associate Librarian
## 1
## Associate Professor
## 241
## Associate Professor / Chair
## 21
## Associate Professor / Chair / Director, Visual Arts
## 1
## Associate Professor / Director, Bio-Technology
## 1
## Associate Professor / Director, Centre for Labour Studies
## 1
## Associate Professor / Director, Centre for Liberal Arts
## 1
## Associate Professor / Director, Centre for Medieval & Renaissance Studies
## 1
## Associate Professor / Director, English Language & Literature
## 1
## Associate Professor / Director, Environmental Sustainability Research Centre
## 1
## Associate Professor / Director, Recreation & Leisure Studies
## 1
## Associate Professor / Director, School Of Fine & Performing Arts
## 1
## Associate Vice-President, Facilities Management
## 1
## Associate Vice-President, Finance
## 1
## Associate Vice-President, Human Resources
## 2
## Associate Vice-President, Information Technology Services
## 1
## Associate Vice-President, Research
## 1
## Associate Vice-President, University Services
## 1
## Dean
## 6
## Director & International Liaison Officer
## 1
## Director, Accounting And Treasury
## 1
## Director, Alumni And Donor Relations
## 1
## Director, Application Development
## 1
## Director, Campus Security Services
## 1
## Director, Centre Continuing Teacher Education / Associate Professor
## 1
## Director, Centre for Pedagogical Innovation
## 1
## Director, Client Services
## 1
## Director, Co-Op Programs
## 1
## Director, Co-op, Career And Experiential Education
## 1
## Director, Communications & Marketing
## 1
## Director, Cool Climate Oenology Viticulture Institute / Associate Professor
## 1
## Director, Custodial & Grounds Services
## 1
## Director, Development
## 1
## Director, English as Second Language Services
## 1
## Director, Faculty Relations
## 1
## Director, Financial Strategy & Operations
## 1
## Director, Graduate Studies
## 1
## Director, Health, Safety & Wellness
## 1
## Director, Information Technology Infrastructure
## 1
## Director, Innovation And Commercialization
## 1
## Director, International Market Development
## 1
## Director, International Recruitment
## 1
## Director, Maintenance & Utilities Services
## 1
## Director, Procurement Services
## 1
## Director, Recreation Services
## 1
## Director, Residence
## 1
## Director, Special Projects
## 1
## Director, Student Development Centre
## 1
## Director, Systems And Special Projects
## 1
## Enterprise Database Admin
## 1
## Executive Director, Development & Alumni Relations
## 1
## Head, Collections Services / Librarian III
## 1
## Information Technology Manager, Data Architect
## 1
## Interim Associate Dean
## 1
## Interim Associate University Librarian
## 1
## Interim Dean
## 4
## Interim Director, Athletics & Recreation
## 1
## Interim University Librarian
## 1
## Lecturer
## 15
## Librarian Head / Librarian III
## 1
## Librarian III
## 4
## Librarian IV
## 1
## Manager, Athletic Therapy Clinic
## 1
## Manager, Compensation
## 1
## Manager, Mechanical Services and Associate Director, Operations & Maintenance Services
## 1
## Manager, Technical Services
## 1
## President
## 1
## Professor
## 144
## Professor / Chair
## 12
## Professor / Director, Centre for Applied Disability Studies
## 1
## Professor / Director, Centre for Neuroscience
## 1
## Professor / Director, Teacher Education
## 3
## Provost And Vice-President Academic
## 1
## Registrar / Interim Associate Vice-President, Enrollment
## 1
## Special Collection Librarian & University Archivist / Librarian III
## 1
## Vice-President, Finance & Administration
## 1
## Vice-President, Research
## 1
## Vice-Provost & Associate Vice-President, Academic
## 1
## Vice-Provost, Teaching And Learning
## 1
plot(table(rawData_universities_Brock$Job.title))
ggplot(rawData_universities_Brock, aes(x=factor(Job.title), y=Salary.Paid)) + geom_boxplot() + theme(axis.text.x = element_text(angle=90, vjust=0.5)) + ylim(0,500000)
rawData_universities_Brock_noDup = rawData_universities_Brock[(duplicated(rawData_universities_Brock$Job.title) | duplicated(rawData_universities_Brock$Job.title, fromLast = TRUE)), ]
rawData_universities_Brock_noDup$Job.title = with(rawData_universities_Brock_noDup, reorder(Job.title, Salary.Paid, median))
ggplot(rawData_universities_Brock_noDup, aes(x=factor(Job.title), y=Salary.Paid)) + geom_boxplot() + theme(axis.text.x = element_text(angle=90, vjust=0.5)) + ylim(0,500000)
# Create the table condensing down to school
table(rawData_universities$Employer)
##
## Algoma University
## 43
## Brescia University College
## 25
## Brock University
## 583
## Carleton University
## 778
## Huntington University
## 8
## Huron University College
## 30
## King's University College
## 88
## Lakehead University
## 300
## Laurentian University of Sudbury
## 385
## McMaster Divinity College
## 3
## McMaster University
## 1137
## Nipissing University
## 104
## Northern Ontario School of Medicine
## 37
## Ontario College of Art & Design University
## 108
## Queen's University
## 995
## Ryerson University
## 1046
## Saint Paul University / Université Saint-Paul
## 16
## St. Jerome's University
## 24
## St. Peter's Seminary
## 2
## Thorneloe University
## 7
## Trent University
## 249
## Trinity College
## 14
## Université de Hearst
## 4
## University of Guelph
## 830
## University of Ontario Institute of Technology
## 194
## University of Ottawa
## 1400
## University of St. Michael's College
## 20
## University of Sudbury
## 9
## University of Toronto
## 3288
## University of Waterloo
## 1295
## University of Western Ontario
## 1298
## University of Windsor
## 546
## Victoria University
## 34
## Wilfrid Laurier University
## 556
## York University
## 1609
summary_university = setDT(rawData_universities)[, list(Median.Salary = median(Salary.Paid)), by=Employer]
# Refactor next line by putting city in whole dataframe, then condensing down
summary_university$City = c("Sault Ste Marie", "London", "Ste Catharines", "Ottawa", "Sudbury", "London", "London", "Thunder Bay", "Sudbury", "Hamilton",
"Hamilton", "North Bay", "Sudbury", "Toronto", "Kingston", "Toronto", "Ottawa", "Waterloo", "London", "Sudbury", "Peterborough",
"Toronto", "Hearst", "Ottawa", "Guelph", "Oshawa", "Toronto","Sudbury", "Toronto", "Waterloo", "London", "Windsor", "Toronto", "Waterloo",
"Toronto")
summary_city = summary_university[, list(Average.Salary = mean(Median.Salary)), by=City]
barplot(height = summary_city$Average.Salary, names.arg = summary_city$City, las=2)
rawData_universities_dt = data.table(rawData_universities)
rawData_universities_dt$City[rawData_universities_dt$Employer == "University of Toronto" |
rawData_universities_dt$Employer == "York University" |
rawData_universities_dt$Employer == "Ryerson University" |
rawData_universities_dt$Employer == "University of St. Michael's College" |
rawData_universities_dt$Employer == "Victoria University" |
rawData_universities_dt$Employer == "Ontario College of Art & Design University" |
rawData_universities_dt$Employer == "Trinity College"] = "Toronto"
rawData_universities_dt$City[rawData_universities_dt$Employer == "University of Ottawa" |
rawData_universities_dt$Employer == "Carleton University" |
rawData_universities_dt$Employer == "Saint Paul University / Université Saint-Paul"] = "Ottawa"
rawData_universities_dt$City[rawData_universities_dt$Employer == "University of Western Ontario" |
rawData_universities_dt$Employer == "Brescia University College" |
rawData_universities_dt$Employer == "Huron University College" |
rawData_universities_dt$Employer == "King's University College" |
rawData_universities_dt$Employer == "St. Peter's Seminary"] = "London"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Algoma University"] = "Sault Ste Marie"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Brock University"] = "Ste Catharines"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Huntington University" |
rawData_universities_dt$Employer == "Laurentian University of Sudbury" |
rawData_universities_dt$Employer == "Northern Ontario School of Medicine" |
rawData_universities_dt$Employer == "Thorneloe University" |
rawData_universities_dt$Employer == "University of Sudbury"] = "Sudbury"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Lakehead University"] = "Thunder Bay"
rawData_universities_dt$City[rawData_universities_dt$Employer == "McMaster University" |
rawData_universities_dt$Employer == "McMaster Divinity College"] = "Hamilton"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Nipissing University"] = "North Bay"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Queen's University"] = "Kingston"
rawData_universities_dt$City[rawData_universities_dt$Employer == "University of Guelph"] = "Guelph"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Université de Hearst"] = "Hearst"
rawData_universities_dt$City[rawData_universities_dt$Employer == "University of Ontario Institute of Technology"] = "Oshawa"
rawData_universities_dt$City[rawData_universities_dt$Employer == "Trent University"] = "Peterborough"
rawData_universities_dt$City[rawData_universities_dt$Employer == "St. Jerome's University" |
rawData_universities_dt$Employer == "University of Waterloo" |
rawData_universities_dt$Employer == "Wilfrid Laurier University"] = "Waterloo"
rawData_universities_dt$City[rawData_universities_dt$Employer == "University of Windsor"] = "Windsor"
summary_city = rawData_universities_dt[, list(Median.Salary = median(Salary.Paid)), by=City]
summary_city = summary_city[order(Median.Salary)]
barplot(height = summary_city$Median.Salary, names.arg = summary_city$City, las=2, ylab = "Median Salary for City ($)")
rawData_universities_dt$City = with(rawData_universities_dt, reorder(City, Salary.Paid, median))
city_vs_salary_plot = ggplot(rawData_universities_dt, aes(x=factor(City), y=Salary.Paid))+ geom_boxplot() + theme(axis.text.x = element_text(angle=90, vjust=0.5)) + ylim(0,500000)
ggplotly(city_vs_salary_plot)
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
# Find the cost of living for city and plot against median salary for university
# Find the tuition for school and plot against median salary for university
# Find the school's ranking and plot against median salary for university